Loading Packages

library(tidyverse)
#Load any additional packages
library(nycflights13)

Dataset

# Display flights dataset
flights

filter()


Flights that had an arrival delay of two or more hours

filter(flights, arr_delay >= 120)

Flights that Flew to Houston (IAH or HOU)

filter(flights, dest == "IAH" | dest == "HOU")

Flights that were operated by United, American, or Delta

filter(flights, carrier == "UA" | carrier == "AA" | carrier == "DL")

Flights that Departed in summer (July, August, and September)

filter(flights, month %in% c(7,8,9))

Flights that arrived more than two hours late, but didn’t leave late

filter(flights, arr_delay > 120, dep_delay <= 0)

Flights that Were delayed by at least an hour, but made up over 30 minutes in flight

filter(flights, dep_delay >= 60, arr_delay < dep_delay - 30)

arrange()

Sort flights to find the most delayed flights.

arrange(flights, desc(dep_delay))

Sort flights to find the fastest (highest speed) flights.

arrange(flights, desc(distance / air_time))

mutate()

Create 2 new variables, 1 for converted dep_time and one for converted sched_dep_time.

flights_with_new_columns <- flights

flights_with_new_columns <-mutate(flights_with_new_columns,
converted_dep_time = (((dep_time %/% 100)*60) + (dep_time %% 100)),
converted_sched_dep_time = (((sched_dep_time %/% 100)*60) + (sched_dep_time %% 100)))

Piping

Create your own Data Transformation

# pipe is used to filter out all flights departing from JFK on January 1st of each year, selecting some columns and add a column that checks if the flight is delayed for more than 15 minutes, sorted in ascending order of air time
flights |>
  filter(origin == "JFK", month == 1, day == 1) |>
  select(flight, tailnum, origin, dest, air_time, carrier, dep_delay) |>
  mutate(delay_category = ifelse(dep_delay > 15, "Delayed more than 15 mins", "On Time")) |>
  arrange(air_time)